## R code

library(foreign)
library(arm)
updated
########################################################
#### Create the final dataset.  					 ###
#### This doesn't go into public replication files.  ###
########################################################

loc<-"/Users/cwarshaw/Dropbox/Projects/americanideologyproject/Cities/"
data<-read.dta(paste(loc,"cities_analysis_institutions.dta",sep=""))
dim(data)
colnames(data)[colnames(data)=="emplooyees_fte"]<-"employees_fte"
posteriors<-read.csv(paste(loc,"cities_posteriors_basic.csv",sep=""))
data<-merge(data, posteriors, by="city_id")
dim(data)
cor(data$jags.mean, data$pres_2008, use="complete.obs")

data$pal_binary<-NA
data$pal_binary[data$perc_atlarge2==1]<-1
data$pal_binary[data$perc_atlarge2< 1]<-0

## drop duplicate data
dim(data)

## Waukegan IL, drop the township and keep the city
data<-subset(data, gov_id!="143049016")

## Bloomington IL, drop the township and keep the city
data<-subset(data, gov_id!="143057801")
dim(data)

## Clinton MI, both townships, drop the one that doesn't match to 2007 CoG
data<-subset(data, gov_id!="233046004")

## Forest  Hills MI, includes duplicates for several neighboring townships, drop them
data<-subset(data, gov_id!="233041008")
data<-subset(data, gov_id!="233041001")
data<-subset(data, gov_id!="233046004")

## Harrison NY, includes odd duplicates
dude<-which(data$city=="Harrison" & data$abb=="NY")
data<-data[-dude[2:4],]

## Shawnee OK, includes odd duplicates
dude<-which(data$city=="Shawnee" & data$abb=="OK")
data<-data[-dude[2],]

## Keizer OR, includes odd duplicates
dude<-which(data$city=="Keizer" & data$abb=="OR")
data<-data[-dude[2],]
dim(data)

## Back Mountain PA, includes odd duplicates
dude<-which(data$city=="Back Mountain" & data$abb=="PA")
data<-data[-dude[2:6],]

## Buffalo Grove IL, includes odd duplicates
dude<-which(data$city=="Buffalo Grove" & data$abb=="IL")
data<-data[-dude[2],]

## San Pablo CA, includes odd duplicates
dude<-which(data$city=="San Pablo" & data$abb=="CA")
data<-data[-dude[2],]

## Union NJ, includes odd duplicates
dude<-which(data$city=="Union" & data$abb=="NJ")
data<-data[-dude[2],]

## Chandler AZ includes odd duplicates
dude<-which(data$city=="Chandler" & data$abb=="AZ")
data<-data[-dude[2],]



data$partisan_elections2<-as.numeric(as.vector(data$partisan_elections2))
data$initiative2<-as.numeric(as.vector(data$initiative2))
data$term_limits2<-as.numeric(as.vector(data$term_limits2))
data$fog2<-as.numeric(as.vector(data$fog2))
data$city_pop<-as.numeric(as.vector(data$city_pop))
data$house_value<-as.numeric(as.vector(data$house_value))
data$mrp_ideology<-as.numeric(as.vector(data$mrp_ideology))
data$median_income<-as.numeric(as.vector(data$median_income))
data$percent_black<-as.numeric(as.vector(data$percent_black))
data$pal_binary<-as.numeric(as.vector(data$pal_binary))

regions<-read.dta(paste(loc,"regions_political.dta",sep=""))

data<-merge(data, regions, by="abb")

data$city_pop<-data$city_pop/100
data$median_income<-data$median_income/100
data$house_value<-data$house_value/100

data<-subset(data, ,-c(mrp_ideology, police_capita, welfare_capita, salestax_capita, average_payroll_fulltime, term_limits_check, updated_termlimits, updated, updated_fog, updated_initiative, updated_partisanelections, updated_percatlarge, v16, gov2,offcycle_elections2))

colnames(data)[colnames(data)=="jags.mean"]<-"mrp_ideology"
colnames(data)[colnames(data)=="jags.lower"]<-"mrp_ideology_lower"
colnames(data)[colnames(data)=="jags.upper"]<-"mrp_ideology_upper"
colnames(data)[colnames(data)=="jags.sd"]<-"mrp_ideology_sd"

cor(data$mrp_ideology, data$pres_2008, use="complete.obs")

## units are now actual dollars
data$taxes_capita<-data$taxes_capita*1000
data$expenditures_capita<-data$expenditures_capita*1000


write.csv(data, paste(loc,"cities_finaldata.csv",sep=""))

save(data,file=paste(loc,"cities_140430.RData",sep=""))